# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from crccmall.rules import *
from datetime import datetime
from zc_core.util.http_util import *
from scrapy.utils.project import get_project_settings
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'spu_supplier'
    # 常用链接
    index_url = 'https://www.crccmall.com/api/portal/ecPortal/get?uuids={}'
    # 分类链接
    catalog_url = 'https://www.crccmall.com/api/merchandise/ecGoods/getGoodsClassFromSolr?companyId={}'
    # 商品列表页
    sku_list_url = "https://www.crccmall.com/crccmall_solr/crccGoods/searchGoodsFromSolr?companyId={}&classType=0&current={}&pageSize=12&gc_ids={}&startPrice=&endPrice=&mail_flag=&orderBy=&reverse=&goods_keywords=&brand_ids="

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 20
        self.max_page_limit = 100
        self.settings = get_project_settings()
        self.SKU1_WHITE_LIST = self.settings.get('SKU1_WHITE_LIST')

    def _build_list_req(self, url, catalog1Id, catalog1Name, catalog2Id, catalog2Name, catalog3Id, catalog3Name, page,
                        company_id, supplierName):
        return Request(
            url=url,
            meta={
                'reqType': 'spu',
                'batchNo': self.batch_no,
                'page': page,
                'catalog1Id': catalog1Id,
                'catalog1Name': catalog1Name,
                'catalog2Id': catalog2Id,
                'catalog2Name': catalog2Name,
                'catalog3Id': catalog3Id,
                'catalog3Name': catalog3Name,
                'companyId': company_id,
                'supplierName': supplierName
            },
            callback=self.parse_sku_content_deal,
            errback=self.error_back,
            dont_filter=True,
        )

    def start_requests(self):
        # 品类、品牌
        settings = get_project_settings()
        suppliers = settings.get('SUPPLIERS', {})
        for name, id in suppliers.items():
            yield Request(
                url=self.index_url.format(id),
                meta={
                    'reqType': 'spu',
                    'batchNo': self.batch_no,
                    'supplierName': name,
                    'supplierId': id,
                },
                callback=self.parse_total_page,
                errback=self.error_back,
                dont_filter=True,
            )

    # 获取公司id
    def parse_total_page(self, response):
        meta = response.meta
        supplier_name = meta.get('supplierName')
        supplier_id = meta.get('supplierId')
        # 处理品类列表
        company_id = parse_company_id(response)
        yield Request(
            url=self.catalog_url.format(company_id),
            meta={
                'reqType': 'spu',
                'batchNo': self.batch_no,
                'supplierName': supplier_name,
                'supplierId': supplier_id,
                'companyId': company_id,
            },
            callback=self.parse_content,
            errback=self.error_back,
            dont_filter=True,
        )

    def parse_content(self, response):
        # print(response.text)
        meta = response.meta
        companyId = meta.get('companyId')
        supplierName = meta.get('supplierName')
        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

            for cat in cats:

                if cat.get('level') == 3:
                    # 采集sku列表

                    catalog3Id = cat.get('catalogId')
                    catalog3Name = cat.get('catalogName')
                    catalog2Id = cat.get('parentId')
                    catalog2Name = [i.get('catalogName') for i in cats if i.get('catalogId') == cat.get('parentId')][0]
                    catalog1Id = [i.get('parentId') for i in cats if i.get('catalogId') == catalog2Id][0]
                    catalog1Name = [i.get('catalogName') for i in cats if i.get('catalogId') == catalog1Id][0]
                    if self.SKU1_WHITE_LIST:
                        if catalog1Name in self.SKU1_WHITE_LIST:
                            yield self._build_list_req(self.sku_list_url.format(companyId, 1, catalog3Id), catalog1Id,
                                                       catalog1Name,
                                                       catalog2Id, catalog2Name,
                                                       catalog3Id, catalog3Name, 1, companyId, supplierName)
                    else:
                        yield self._build_list_req(self.sku_list_url.format(companyId, 1, catalog3Id), catalog1Id,
                                                   catalog1Name,
                                                   catalog2Id, catalog2Name,
                                                   catalog3Id, catalog3Name, 1, companyId, supplierName)

    # 处理sku列表
    def parse_sku_content_deal(self, response):

        meta = response.meta
        companyId = meta.get('companyId')
        catalog1Name = meta.get('catalog1Name')
        catalog1Id = meta.get('catalog1Id')
        catalog2Name = meta.get('catalog2Name')
        catalog2Id = meta.get('catalog2Id')
        catalog3Name = meta.get('catalog3Name')
        catalog3Id = meta.get('catalog3Id')
        cur_page = meta.get('page')
        supplierName = meta.get('supplierName')

        # 处理sku列表
        sku_list = parse_sku(response)
        self.logger.info("清单: cat=%s, page=%s" % (catalog3Id, cur_page))
        if sku_list:
            yield Box('spu', self.batch_no, sku_list)
            # 发起分页
            if cur_page == 1:
                # 解析页面
                total_pages = parse_total_page(response)
                self.logger.info("清单1: cat=%s, total_page=%s" % (catalog3Id, total_pages))
                for page in range(2, total_pages + 1):
                    yield self._build_list_req(self.sku_list_url.format(companyId, page, catalog3Id), catalog1Id,
                                               catalog1Name,
                                               catalog2Id, catalog2Name,
                                               catalog3Id, catalog3Name, page, companyId, supplierName)
        else:
            self.logger.info('空页: cat=%s, page=%s' % (catalog3Id, cur_page))
